#Load Libraries and dataset
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(tidytuesdayR)
tuesdata <- tt_load(2024, week = 32)
## --- Compiling #TidyTuesday Information for 2024-08-06 ----
## --- There is 1 file available ---
## --- Starting Download ---
##
## Downloading file 1 of 1: `olympics.csv`
## --- Download complete ---
oly <- tuesdata$olympics
theme_set(theme_minimal())
df <- oly %>%
select(id, name, noc, year, season, city) %>%
group_by(id, name) %>%
unique()
vis <- function(data, seas = c("Summer", "Winter")){
top_noc <- data %>%
filter(season == seas) %>%
group_by(noc) %>%
summarise(total_count = n()) %>%
ungroup() %>%
top_n(5) %>%
pull(noc)
df <- data %>%
filter(noc %in% top_noc) %>%
group_by(year, noc) %>%
summarise(count = n(), .groups = 'drop') %>%
ungroup() %>%
group_by(noc) %>%
#mutate(label = ifelse(row_number() == n(), paste0(noc, " - ", count), NA)) %>%
ungroup() %>%
arrange(year, noc)
# plot <- ggplot(df, aes(x = year, y = count, fill = noc) )+
# geom_area()+
# #geom_line(alpha = 0.6, linewidth = 1) +
# #geom_text(nudge_x = 3, size = 4)+
# labs(title = paste0("Top 5 Country Participation - ", seas, " Olympics"))+
# ylab("Total athletes")+
# theme_bw()+
# theme(legend.position = 'bottom',
# legend.title = element_blank())
fig <- plot_ly(data = df,
x =~year,
y = ~count,
type = 'scatter',
color = ~noc,
mode = 'line',
stackgroup = 'one',
hoverinfo = 'text',
hovertext = paste0("Country: ", df$noc,
"<br>Count: ", df$count,
"<br>Year:", df$year) ) %>%
layout(title = paste0("Top 5 country participation - ", seas, " Olympics"),
yaxis = list(title = "Total Athletes"),
xaxis = list(title = "Year"),
legend = list(title = list(text = '<b>Country</b>')))
return(fig)
}
vis(oly, 'Winter')
## Selecting by total_count
# top_noc <- oly %>% filter(season == 'Winter') %>%
# group_by(noc) %>%
# summarise(total_count = n()) %>%
# ungroup() %>%
# top_n(5) %>% pull(noc)
#
# oly %>%
# filter(noc %in% top_noc) %>%
# group_by(year, noc) %>%
# summarise(count = n(), .groups = 'drop') %>%
# ungroup() %>%
# group_by(noc) %>%
# mutate(label = ifelse(row_number() == n(), paste0(noc, " - ", count), NA)) %>%
# ungroup() %>%
# arrange(year, noc) %>%
# ggplot(aes(x = year, y = count, fill = noc))+geom_area()+
# #geom_line(alpha = 0.6, linewidth = 1) +
# #geom_text(nudge_x = 3, size = 4)+
# labs(title = paste0("Top 5 Country Participation - Olympics"))+
# ylab("Number of athletes")+
# theme_bw()+
# theme(legend.position = 'bottom',
# legend.title = element_blank())
vis(oly, 'Summer')
## Selecting by total_count
Use the function to visualise height or weight difference over the years in the Olympics.
boxplot <- function(sports, gen, metric = c('height', 'weight')){
df <- oly %>%
mutate(sex = ifelse(sex == 'M', "Male", "Female"))
if(metric == 'height'){
fig <- df %>%
filter(
sport == sports,
sex == gen
) %>%
ggplot(aes(x = year, y = height))+
geom_boxplot(aes(group = factor(year)))+
geom_smooth(method = 'loess', formula = 'y~x', se = FALSE)+
scale_x_continuous(breaks = 193:201 *10) +
labs(x = NULL, y = 'Height (cm)',
title = paste0('Heights of ', gen, ' ', sports, ' athletes in the Olympics'),
caption = 'Source: Kaggle Olympics history data')+
theme_bw()
}else{
fig <- df %>%
filter(
sport == sports,
sex == gen
) %>%
ggplot(aes(x = year, y = weight))+
geom_boxplot(aes(group = factor(year)))+
geom_smooth(method = 'loess', formula = 'y~x', se = FALSE)+
scale_x_continuous(breaks = 193:201 *10) +
labs(x = NULL, y = 'Weight (kg)',
title = paste0('Weights of ', gen, ' ', sports, ' athletes in the Olympics'),
caption = 'Source: Kaggle Olympics history data')+
theme_bw()
}
return (fig)
}
boxplot("Basketball", "Male", 'height')
## Warning: Removed 650 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 650 rows containing non-finite outside the scale range
## (`stat_smooth()`).
boxplot("Gymnastics", "Female", 'weight')
## Warning: Removed 1552 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 1552 rows containing non-finite outside the scale range
## (`stat_smooth()`).